This is an R markdown document illustrating the bioinformatic methods used to generate the results for the manuscript by Arsenault et al. entitled "Simple inheritance, complex regulation: supergene-mediated fire ant queen polymorphism". If you have any questions about this document, please direct them to Sam Arsenault at samarsenault93@gmail.com or Brendan G. Hunt at brendan.hunt@gmail.com
library(edgeR)
library(UpSetR)
library(readr)
library(magick)
library(gridExtra)
library(karyoploteR)
library(cowplot)
library(ggplot2)
library(topGO)
library(data.table)
library(eulerr)
library(pheatmap)
library(CircStats)
library(RColorBrewer)
library(randtests)
library(readxl)
library(rtracklayer)
library(rentrez)
library(dplyr)
make_polar_DEG <- function(TT_table,radial_lim) {
## Things to Add:
library(ggplot2)
# library(gridExtra)
library(cowplot)
internal <- TT_table
internal$neglogFDR <- -log10(TT_table$FDR)
internal$angle <- atan2(TT_table$logFC.GenotypeBb,TT_table$logFC.Genotypebb)
internal$abslogBb <- abs(TT_table$logFC.GenotypeBb)
internal$abslogbb <- abs(TT_table$logFC.Genotypebb)
internal$length <- internal[,c("abslogBb","abslogbb")][cbind(seq_len(nrow(internal)), max.col(internal[,c("abslogBb","abslogbb")]))]
d=data.frame(angle=c(atan2(1,2),atan2(1,1),atan2(2,1),atan2(1,0),atan2(1,-1),atan2(0,-1),atan2(-1,-2),atan2(-1,-1),atan2(-2,-1),atan2(-1,0),atan2(-1,1),atan2(0,1)),
category=c("bb>Bb>BB", "bb=Bb>BB", "Bb>bb>BB", "Bb>BB=bb", "Bb>BB>bb", "Bb=BB>bb","BB>Bb>bb","BB>Bb=bb","BB>bb>Bb","BB=bb>Bb","bb>BB>Bb","bb>Bb=BB"))
gg_scatter <- ggplot() +
geom_point(data=internal, aes(x=angle, y=neglogFDR),shape=1) +
theme_bw() +
theme(axis.text.x=element_blank()) +
coord_polar(theta="x",clip="off") +
scale_x_continuous(breaks = seq(-pi, pi, pi/3), limits = c(-pi, pi)) +
scale_y_continuous(breaks = seq(0,radial_lim,5),limits = c(0,radial_lim)) +
geom_vline(xintercept = c(atan2(1,2),atan2(1,1),atan2(2,1),atan2(1,0),atan2(1,-1),atan2(0,-1),atan2(-1,-2),atan2(-1,-1),atan2(-2,-1),atan2(-1,0),atan2(-1,1),atan2(0,1))) +
geom_histogram(d=internal,aes(x=angle,y=10*..ncount..),fill="green", alpha=0.3,bins=60) +
geom_text(data=d, mapping=aes(x=angle, y=radial_lim, label=category), size=4, angle=0, vjust=0, hjust=0.5)
return(gg_scatter)
} ## Make the Polar coordinate Dominance Plot
make_volcano <- function(et,Title) {
library(ggplot2)
resFilt <- topTags(et, n=nrow(et$table))
volcanoData <- cbind(resFilt$table$logFC, -log10(resFilt$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
volcanoData <- data.frame(volcanoData)
volcanoData$Sig <- volcanoData$negLogPval > log10(0.01)*(-1)
x_bounds = round(max(abs(volcanoData$logFC)))
y_bound = round(volcanoData$negLogPval)
# first set up the plot
volcPlot <- ggplot(volcanoData,aes(logFC,negLogPval))
# then add the points
volcPlot <- volcPlot + geom_point(aes(colour = Sig),alpha=0.4, size=2)
# volcPlot <- volcPlot + geom_hline(yintercept = log10(0.01)*(-1),size=1) + geom_vline(xintercept = -1,size=1) + geom_vline(xintercept = 1,size=1) + geom_vline(xintercept = 0,size=1) +
volcPlot <- volcPlot + geom_hline(yintercept = log10(0.01)*(-1),size=1) + geom_vline(xintercept = 0,size=1) +
labs(x = "logFC", y = "-log10(P-value)",title=Title) +
theme(legend.position="none", axis.title.x = element_text(face="bold", size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(face="bold", size=16), axis.text.y = element_text(size=12)) +
scale_x_continuous(limits=c((-1)*x_bounds,x_bounds),breaks=c((-1)*x_bounds,-1,0,1,x_bounds)) +
# scale_y_continuous(limits=c(0,y_bound)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))
test <- topTags(et,p.value = 0.01,n=100000)
length(test$table$logFC)
LO <- sum(test$table$logFC < (-1))
LI <- sum((test$table$logFC < 0)&(test$table$logFC >= (-1)))
RI <- sum((test$table$logFC <= 1)&(test$table$logFC > 0))
RO <- sum(test$table$logFC > 1)
print(LO)
print(LI)
print(RI)
print(RO)
return(volcPlot)
} ## Create a standardized volcano plot for a given edgeR QLFtest output.
generate_GO_table <- function(data,Name) {
QLF_TT_all <- topTags(data,p.value = 1,n=Inf)$table
QLF_TT_sig <- topTags(data,p.value = 0.01,n=Inf)$table
## Load in custom annotation
sinv_GO <- readMappings("~/Desktop/Sinv_Analyses/NCBI_genome/NCBI_lg_gene.annot.map", sep = "\t", IDsep=",")
## Load Specific Gene List
myInterestingGenes <- row.names(QLF_TT_sig)
## Load Full Gene Set
geneNames <- row.names(QLF_TT_all)
## Create geneList Structure
geneList <- factor(as.integer(geneNames %in% myInterestingGenes))
names(geneList) <- geneNames
## Prep for file output
output_file = Name
### Run for BP
## Laod the data into a topGO data object (BP,MF,CC)
GOData <- new("topGOdata",ontology="BP",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
## Compute GO term enrichment
resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
allGO = usedGO(object = GOData)
allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
allRes$genes <- aa
res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
output_file4 = paste(output_file,"TopGO_BP_sig.pdf", sep="_")
pdf(file = output_file4,width = 10)
plot.new()
grid.table(res_print,rows=NULL)
dev.off()
GOData <- new("topGOdata",ontology="MF",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
## Compute GO term enrichment
resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
allGO = usedGO(object = GOData)
allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
allRes$genes <- aa
res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
output_file4 = paste(output_file,"TopGO_MF_sig.pdf", sep="_")
pdf(file = output_file4,width = 10)
plot.new()
grid.table(res_print,rows=NULL)
dev.off()
GOData <- new("topGOdata",ontology="CC",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
## Compute GO term enrichment
resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
allGO = usedGO(object = GOData)
allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
allRes$genes <- aa
res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
output_file4 = paste(output_file,"TopGO_CC_sig.pdf", sep="_")
pdf(file = output_file4,width = 10)
plot.new()
grid.table(res_print,rows=NULL)
dev.off()
return()
} ## Compute and Print tables of GO term enrichment values
convert_LOC <- function(dataframe) {
library(rtracklayer)
library(rentrez)
Si_gnG_Ensembl <- readGFF(file="~/Downloads/Solenopsis_invicta.Si_gnG.41.gff3")
Si_gnG_refseq <- readGFF(file="/Users/samarsenault/Desktop/Sinv_Analyses/NCBI_genome/GCF_000188075.1_Si_gnG/GCF_000188075.1_Si_gnG_genomic.gff")
refseq_translation <- data.frame(Si_gnG_refseq[(Si_gnG_refseq$type=="gene")|(Si_gnG_refseq$type=="pseudogene"),c("ID","Name")])
Ensembl_translation <- data.frame(Si_gnG_Ensembl[(Si_gnG_Ensembl$type=="gene")|(Si_gnG_Ensembl$type=="pseudogene"),c("ID","description")])
Ensembl_translation$ID <- gsub("gene:", "", Ensembl_translation$ID)
merged_translation <- merge(x=refseq_translation,y=Ensembl_translation,by.x="Name",by.y="ID")
g <- merge(x=dataframe,y=merged_translation,by.y="ID",by.x=0,all.x=TRUE)
for (i in 1:nrow(g)){
if (!is.na(g$Name[i])&is.na(g$description[i])){
search_term <- gsub(pattern = "LOC",replacement = "",x = g$Name[i])
g$description[i] <- entrez_summary(db="gene",id = search_term)[3]
}
}
return(g)
} ## Add descriptions and proper gene names (LOC) to the data.frame
generate_heatmap_frame <- function(gene_names){
library(karyoploteR)
temp <- Sinv_annot[(row.names(Sinv_annot) %in% gene_names),]
temp.sig<-temp[(grepl("lg",temp$chr)),]
temp.sig <- temp.sig[complete.cases(temp.sig), ]
DEG_GR <- makeGRangesFromDataFrame(temp.sig, seqnames.field = "chr",start.field = "start",end.field = "end",keep.extra.columns = TRUE)
windows <- unlist(tileGenome(stats::setNames(kp$chromosome.lengths, kp$chromosomes),tilewidth = 2e6))
dens <- countOverlaps(windows, DEG_GR)
values(windows) <- data.frame(y = dens)
return(windows)
} ## Takes a list of gene names and makes a heatmap object to pass to karyoploteR
generate_heatmap_frame_SNP <- function(SNP_data){
library(karyoploteR)
DEG_GR <- makeGRangesFromDataFrame(SNP_data, seqnames.field = "lg",start.field = "newstart",end.field = "newend",keep.extra.columns = TRUE)
windows <- unlist(tileGenome(stats::setNames(kp$chromosome.lengths, kp$chromosomes),tilewidth = 500000))
dens <- countOverlaps(windows, DEG_GR)
values(windows) <- data.frame(y = dens)
return(windows)
} ## Takes a list of SNPs and makes a heatmap object to pass to karyoploteR
check_supergene_presence_bias <- function(all_genes_TT_table){
all_genes_TT_table_merge <- merge(all_genes_TT_table,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
supergene_genes_int <- all_genes_TT_table_merge[((all_genes_TT_table_merge$X1=="lg16")&(all_genes_TT_table_merge$X2>=7311525)&(all_genes_TT_table_merge$X3<=18123987)),c("Row.names")]
supergene_genes_int <- supergene_genes_int[!is.na(supergene_genes_int)]
nonsuper_genes_int <- all_genes_TT_table_merge[((grepl(pattern = "lg",x = all_genes_TT_table_merge$X1))&(all_genes_TT_table_merge$Row.names %!in% supergene_genes_int)),c("Row.names")]
in_super_sig <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% supergene_genes_int)&(all_genes_TT_table$FDR<=0.01)),]
in_super_not <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% supergene_genes_int)&(all_genes_TT_table$FDR>0.01)),]
out_super_sig <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% nonsuper_genes_int)&(all_genes_TT_table$FDR<=0.01)),]
out_super_not <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% nonsuper_genes_int)&(all_genes_TT_table$FDR>0.01)),]
out <- c(nrow(in_super_sig),nrow(in_super_not),nrow(out_super_sig),nrow(out_super_not),nrow(all_genes_TT_table))
print(out[1:4])
print(sum(out[1:4])-out[5])
chi_out <- chisq.test(matrix(out[1:4],nrow=2),correct = FALSE)
print(chi_out$observed)
print(chi_out$expected)
print(chi_out$p.value)
print(fisher.test(x=matrix(out[1:4],nrow=2),alternative = "two.sided"))
return()
} ## Takes a full TopTags file (all p-values) and checks for an overrepresentation of significant DEGs in the supergene as opposed to outside of it.
check_supergene_dir_bias <- function(sig_genes_TT_table){
sig_genes_TT_table_merge <- merge(sig_genes_TT_table,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
supergene_genes_int <- sig_genes_TT_table_merge[((sig_genes_TT_table_merge$X1=="lg16")&(sig_genes_TT_table_merge$X2>=7311525)&(sig_genes_TT_table_merge$X3<=18123987)),c("Row.names")]
supergene_genes_int <- supergene_genes_int[!is.na(supergene_genes_int)]
nonsuper_genes_int <- sig_genes_TT_table_merge[((grepl(pattern = "lg",x = sig_genes_TT_table_merge$X1))&(sig_genes_TT_table_merge$Row.names %!in% supergene_genes_int)),c("Row.names")]
in_super_up <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% supergene_genes_int)&(sig_genes_TT_table$logFC>0)),]
in_super_down <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% supergene_genes_int)&(sig_genes_TT_table$logFC<0)),]
out_super_up <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% nonsuper_genes_int)&(sig_genes_TT_table$logFC>0)),]
out_super_down <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% nonsuper_genes_int)&(sig_genes_TT_table$logFC<0)),]
out <- c(nrow(in_super_up),nrow(in_super_down),nrow(out_super_up),nrow(out_super_down),nrow(sig_genes_TT_table))
print(out[1:4])
print(sum(out[1:4])-out[5])
chi_out <- chisq.test(matrix(out[1:4],nrow=2),correct = FALSE)
print(chi_out$observed)
print(chi_out$expected)
print(chi_out$p.value)
print(fisher.test(x=matrix(out[1:4],nrow=2),alternative = "two.sided"))
return()
} ## Takes a table of significant DEGs and assesses whether or not there is a directional bias of differential expression within versus outside of the supergene.
perform_runs_test_sig <- function(all_TT){
add_pos <- merge(all_TT$table,Sinv_annot_lg,by.x=0,by.y="X4")
supergene_bits <- add_pos[((add_pos$X1 == "lg16")&(add_pos$X2>=7311525)&(add_pos$X3<=18123987)),]
supergene_order <- supergene_bits[order(supergene_bits[,'X2']),]
supergene_order_sig <- supergene_order[(supergene_order$FDR<=0.01),]
runs.test(supergene_order$FDR,plot=T,threshold = 0.01)
#runs.test(supergene_order_sig$logFC,plot=T,threshold = 0)
}
perform_runs_test_dir <- function(all_TT){
add_pos <- merge(all_TT$table,Sinv_annot_lg,by.x=0,by.y="X4")
supergene_bits <- add_pos[((add_pos$X1 == "lg16")&(add_pos$X2>=7311525)&(add_pos$X3<=18123987)),]
supergene_order <- supergene_bits[order(supergene_bits[,'X2']),]
supergene_order_sig <- supergene_order[(supergene_order$FDR<=0.01),]
#runs.test(supergene_order$FDR,plot=T,threshold = 0.01)
runs.test(supergene_order_sig$logFC,plot=T,threshold = 0)
}
'%!in%' <- function(x,y)!('%in%'(x,y)) ## Quality of life function
## Load in the gene lengths computed from the annotation for RPKM calculation
len_file=read.delim(file="/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_NCBI_counts/Genelength.tsv",sep="\t")
head(len_file)
## GeneID Length
## 1 gene0 1839
## 2 gene1 894
## 3 gene2 2253
## 4 gene3 870
## 5 gene4 76
## 6 gene5 972
## Load in the counts file generated using the 2-pass method from STAR
x <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_NCBI_counts/NCBI_All_counts.tsv",row.names=1,sep="\t")
head(x)
## lib_104AB lib_104AO lib_104DB lib_104GB lib_104GO lib_107AB lib_107AO
## gene0 0 27 0 9 27 0 24
## gene1 0 0 0 0 0 0 0
## gene2 1 16 0 0 7 0 24
## gene3 23142 2727 21787 22937 2385 10573 3340
## gene4 0 0 0 0 0 0 0
## gene5 14 4 0 8 4 4 3
## lib_107EB lib_107EO lib_107GB lib_107GO lib_15AB lib_15AO lib_15DB
## gene0 0 9 0 10 0 10 0
## gene1 0 0 0 0 0 0 0
## gene2 29 10 88 8 41 4 5
## gene3 15382 4155 17307 4557 6960 7256 5436
## gene4 0 0 0 0 0 0 0
## gene5 15 1 20 0 12 0 2
## lib_15DO lib_15GB lib_15GO lib_16CB lib_16CO lib_16DB lib_16DO lib_16GB
## gene0 20 0 0 0 0 0 21 0
## gene1 0 0 0 0 0 0 0 0
## gene2 14 0 16 27 25 0 9 0
## gene3 3270 15016 3305 1937 2446 7587 2199 6104
## gene4 0 0 0 0 0 0 0 0
## gene5 1 2 1 34 0 25 0 6
## lib_16GO lib_19AB lib_19AO lib_19DB lib_19DO lib_19GB lib_19GO lib_1EB
## gene0 3 0 8 0 0 0 6 2
## gene1 0 0 0 0 0 0 0 0
## gene2 11 20 3 38 8 3 12 5
## gene3 3032 13182 3015 12381 2358 13280 7304 904
## gene4 0 0 0 0 0 0 0 0
## gene5 0 25 1 82 0 25 0 48
## lib_1EO lib_207AB lib_207AO lib_209AB lib_209AO lib_20AB lib_20AO
## gene0 57 42 5 0 0 5 13
## gene1 0 0 0 0 0 0 0
## gene2 15 71 2 41 9 86 11
## gene3 1416 11555 3013 16333 4266 9281 3669
## gene4 0 0 0 0 0 0 0
## gene5 3 73 0 24 0 35 0
## lib_20DB lib_20DO lib_20IB lib_20IO lib_222AB lib_222AO lib_232AB
## gene0 0 2 0 14 0 5 0
## gene1 0 0 0 0 0 0 0
## gene2 24 11 2 11 26 8 44
## gene3 24916 4310 17208 4228 31449 3370 10125
## gene4 0 0 0 0 0 0 0
## gene5 19 0 23 0 13 0 9
## lib_232AO lib_233AB lib_233AO lib_235AB lib_235AO lib_239AB lib_239AO
## gene0 4 0 4 0 0 0 4
## gene1 0 0 0 0 0 0 0
## gene2 10 0 4 24 5 0 9
## gene3 3138 12777 3220 12547 5417 25319 4123
## gene4 0 0 0 0 0 0 0
## gene5 0 36 0 20 0 1 0
## lib_240AB lib_240AO lib_30AB lib_30AO lib_30EB lib_30EO lib_30GB lib_30GO
## gene0 0 0 2 13 0 3 0 1
## gene1 0 0 0 0 0 0 0 0
## gene2 0 5 267 2 5 8 89 8
## gene3 17585 3988 5901 2905 7458 4653 11744 2184
## gene4 0 0 0 0 0 0 0 0
## gene5 15 0 6 0 11 0 12 0
## lib_5AB lib_5AO lib_5GB lib_5GO
## gene0 0 9 10 8
## gene1 0 0 0 0
## gene2 44 11 15 10
## gene3 4405 3841 5980 2152
## gene4 0 0 0 0
## gene5 35 0 18 0
## Load in the model matrix containing information formatted for both pairiwse and GLM comparisons
adv_group <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_model_matrix5.tsv",row.names = 1,sep = "\t")
head(adv_group)
## Individual_ID Colony_ID Tissue Social.Form Genotype Alt_ID
## 104AB 104A 104 Brain Polygyne Bb Brain.Polygyne.Bb
## 104AO 104A 104 Ovary Polygyne Bb Ovary.Polygyne.Bb
## 104DB 104D 104 Brain Polygyne BB Brain.Polygyne.BB
## 104GB 104G 104 Brain Polygyne bb Brain.Polygyne.bb
## 104GO 104G 104 Ovary Polygyne bb Ovary.Polygyne.bb
## 107AB 107A 107 Brain Polygyne Bb Brain.Polygyne.Bb
## Load in the annotation information from a bed file
Sinv_annot <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lg_gene_v3.bed",
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_character()
## )
row.names(Sinv_annot) <- Sinv_annot$X4 ## Rename the rows of the this variable with gene names
## Warning: Setting row names on a tibble is deprecated.
colnames(Sinv_annot) <- c("chr","start","end","gene") ## rename the columns with better description
head(Sinv_annot)
## # A tibble: 6 x 4
## chr start end gene
## <chr> <dbl> <dbl> <chr>
## 1 NC_014672.1 10229 11174 gene15723
## 2 NC_014672.1 14342 15323 gene15724
## 3 NC_014672.1 4157 4507 gene15717
## 4 NC_014672.1 4929 6596 gene15718
## 5 NC_014672.1 8466 8996 gene15721
## 6 NC_014672.1 9021 10142 gene15722
## Load in the linkage mapped genome annotations
Sinv_annot_lg <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lg_gene_v4.bed",
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_character()
## )
y_pw <- DGEList(x,group = adv_group$Alt_ID)
y_pw$samples
## group lib.size norm.factors
## lib_104AB Brain.Polygyne.Bb 23839271 1
## lib_104AO Ovary.Polygyne.Bb 21048826 1
## lib_104DB Brain.Polygyne.BB 23454386 1
## lib_104GB Brain.Polygyne.bb 24389394 1
## lib_104GO Ovary.Polygyne.bb 20495668 1
## lib_107AB Brain.Polygyne.Bb 20141694 1
## lib_107AO Ovary.Polygyne.Bb 17197423 1
## lib_107EB Brain.Polygyne.BB 21829149 1
## lib_107EO Ovary.Polygyne.BB 19294902 1
## lib_107GB Brain.Polygyne.bb 25168093 1
## lib_107GO Ovary.Polygyne.bb 16074944 1
## lib_15AB Brain.Polygyne.Bb 14960249 1
## lib_15AO Ovary.Polygyne.Bb 16752710 1
## lib_15DB Brain.Polygyne.BB 13227223 1
## lib_15DO Ovary.Polygyne.BB 19369746 1
## lib_15GB Brain.Polygyne.bb 15342322 1
## lib_15GO Ovary.Polygyne.bb 15462774 1
## lib_16CB Brain.Polygyne.Bb 10874244 1
## lib_16CO Ovary.Polygyne.Bb 18167411 1
## lib_16DB Brain.Polygyne.BB 18118617 1
## lib_16DO Ovary.Polygyne.BB 15154985 1
## lib_16GB Brain.Polygyne.bb 14252480 1
## lib_16GO Ovary.Polygyne.bb 11697881 1
## lib_19AB Brain.Polygyne.Bb 21454629 1
## lib_19AO Ovary.Polygyne.Bb 14055961 1
## lib_19DB Brain.Polygyne.BB 22242852 1
## lib_19DO Ovary.Polygyne.BB 12250091 1
## lib_19GB Brain.Polygyne.bb 22026200 1
## lib_19GO Ovary.Polygyne.bb 18419244 1
## lib_1EB Brain.Polygyne.BB 9465077 1
## lib_1EO Ovary.Polygyne.BB 15489624 1
## lib_207AB Brain.Monogyne.BB 23545677 1
## lib_207AO Ovary.Monogyne.BB 13189348 1
## lib_209AB Brain.Monogyne.BB 25094494 1
## lib_209AO Ovary.Monogyne.BB 18649952 1
## lib_20AB Brain.Polygyne.Bb 20937228 1
## lib_20AO Ovary.Polygyne.Bb 19166278 1
## lib_20DB Brain.Polygyne.BB 21474982 1
## lib_20DO Ovary.Polygyne.BB 20286421 1
## lib_20IB Brain.Polygyne.bb 23393220 1
## lib_20IO Ovary.Polygyne.bb 19737048 1
## lib_222AB Brain.Monogyne.BB 26950035 1
## lib_222AO Ovary.Monogyne.BB 18315290 1
## lib_232AB Brain.Monogyne.BB 15462514 1
## lib_232AO Ovary.Monogyne.BB 17487369 1
## lib_233AB Brain.Monogyne.BB 18101509 1
## lib_233AO Ovary.Monogyne.BB 16506089 1
## lib_235AB Brain.Monogyne.BB 17354499 1
## lib_235AO Ovary.Monogyne.BB 22630544 1
## lib_239AB Brain.Monogyne.BB 23630798 1
## lib_239AO Ovary.Monogyne.BB 19564509 1
## lib_240AB Brain.Monogyne.BB 19079343 1
## lib_240AO Ovary.Monogyne.BB 18214783 1
## lib_30AB Brain.Polygyne.Bb 18465236 1
## lib_30AO Ovary.Polygyne.Bb 13595740 1
## lib_30EB Brain.Polygyne.BB 13622038 1
## lib_30EO Ovary.Polygyne.BB 18007543 1
## lib_30GB Brain.Polygyne.bb 16763253 1
## lib_30GO Ovary.Polygyne.bb 10097662 1
## lib_5AB Brain.Polygyne.Bb 13352005 1
## lib_5AO Ovary.Polygyne.Bb 11809254 1
## lib_5GB Brain.Polygyne.bb 16318389 1
## lib_5GO Ovary.Polygyne.bb 11915595 1
## Filter out low count genes
keep <- rowSums(cpm(y_pw)>10/9) >= 8
len_file <- len_file[ as.vector(keep) , ]
y_pw <- y_pw[keep, , keep.lib.sizes=FALSE]
## Normalize samples by library size
y_pw <- calcNormFactors(y_pw)
Background.genes <- row.names(y_pw$counts)
## Calculate RPKM values
adv_group$FullID <- paste(adv_group$Individual_ID,adv_group$Alt_ID,sep = ".")
## Begin Computing Differential Expression
design_pw <- model.matrix(~0+adv_group$Alt_ID,data=y_pw$samples)
colnames(design_pw) <- levels(y_pw$samples$group)
y_pw <- estimateDisp(y_pw,design_pw)
y_pw <- estimateGLMCommonDisp(y_pw, design_pw)
y_pw <- estimateGLMTrendedDisp(y_pw, design_pw)
y_pw <- estimateGLMTagwiseDisp(y_pw, design_pw)
## QLM Fit and DE analysis
fit_pw <- glmQLFit(y_pw,design_pw)
my.contrasts <- makeContrasts(Brain_M_v_P_BB=Brain.Monogyne.BB-Brain.Polygyne.BB,
Brain_BB_v_Bb=Brain.Polygyne.BB-Brain.Polygyne.Bb,
Brain_Bb_v_bb=Brain.Polygyne.Bb-Brain.Polygyne.bb,
Brain_mBB_v_pBb=Brain.Monogyne.BB-Brain.Polygyne.Bb,
Brain_mBB_v_pbb=Brain.Monogyne.BB-Brain.Polygyne.bb,
Ovary_M_v_P_BB=Ovary.Monogyne.BB-Ovary.Polygyne.BB,
Ovary_BB_v_Bb=Ovary.Polygyne.BB-Ovary.Polygyne.Bb,
Ovary_Bb_v_bb=Ovary.Polygyne.Bb-Ovary.Polygyne.bb,
Ovary_mBB_v_pBb=Ovary.Monogyne.BB-Ovary.Polygyne.Bb,
Ovary_mBB_v_pbb=Ovary.Monogyne.BB-Ovary.Polygyne.bb,
Brain_v_Ovary_pBB=Brain.Polygyne.BB-Ovary.Polygyne.BB,
Brain_v_Ovary_mBB=Brain.Monogyne.BB-Ovary.Monogyne.BB,
Brain_v_Ovary_pBb=Brain.Polygyne.Bb-Ovary.Polygyne.Bb,
Brain_v_Ovary_pbb=Brain.Polygyne.bb-Ovary.Polygyne.bb,
Ovary_BB_v_bb=Ovary.Polygyne.BB-Ovary.Polygyne.bb,
Brain_BB_v_bb=Brain.Polygyne.BB-Brain.Polygyne.bb,
levels=design_pw)
pw.qlf.Brain_M_v_P_BB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_M_v_P_BB"])
pw.qlf.Brain_M_v_P_BB.TT <- topTags(pw.qlf.Brain_M_v_P_BB,p.value = 1,n=100000)
pw.qlf.Brain_M_v_P_BB.TT.sig <- topTags(pw.qlf.Brain_M_v_P_BB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_M_v_P_BB.TT.names <- row.names(pw.qlf.Brain_M_v_P_BB.TT$table)
pw.qlf.Ovary_M_v_P_BB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_M_v_P_BB"])
pw.qlf.Ovary_M_v_P_BB.TT <- topTags(pw.qlf.Ovary_M_v_P_BB,p.value = 1,n=100000)
pw.qlf.Ovary_M_v_P_BB.TT.sig <- topTags(pw.qlf.Ovary_M_v_P_BB,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_M_v_P_BB.TT.names <- row.names(pw.qlf.Ovary_M_v_P_BB.TT$table)
pw.qlf.Brain_BB_v_Bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_BB_v_Bb"])
pw.qlf.Brain_BB_v_Bb.TT <- topTags(pw.qlf.Brain_BB_v_Bb,p.value = 1,n=100000)
pw.qlf.Brain_BB_v_Bb.TT.sig <- topTags(pw.qlf.Brain_BB_v_Bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_BB_v_Bb.TT.names <- row.names(pw.qlf.Brain_BB_v_Bb.TT$table)
pw.qlf.Ovary_BB_v_Bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_BB_v_Bb"])
pw.qlf.Ovary_BB_v_Bb.TT <- topTags(pw.qlf.Ovary_BB_v_Bb,p.value = 1,n=100000)
pw.qlf.Ovary_BB_v_Bb.TT.sig <- topTags(pw.qlf.Ovary_BB_v_Bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_BB_v_Bb.TT.names <- row.names(pw.qlf.Ovary_BB_v_Bb.TT$table)
pw.qlf.Brain_Bb_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_Bb_v_bb"])
pw.qlf.Brain_Bb_v_bb.TT <- topTags(pw.qlf.Brain_Bb_v_bb,p.value = 1,n=100000)
pw.qlf.Brain_Bb_v_bb.TT.sig <- topTags(pw.qlf.Brain_Bb_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_Bb_v_bb.TT.names <- row.names(pw.qlf.Brain_Bb_v_bb.TT$table)
pw.qlf.Ovary_Bb_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_Bb_v_bb"])
pw.qlf.Ovary_Bb_v_bb.TT <- topTags(pw.qlf.Ovary_Bb_v_bb,p.value = 1,n=100000)
pw.qlf.Ovary_Bb_v_bb.TT.sig <- topTags(pw.qlf.Ovary_Bb_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_Bb_v_bb.TT.names <- row.names(pw.qlf.Ovary_Bb_v_bb.TT$table)
pw.qlf.Brain_v_Ovary_pBB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pBB"])
pw.qlf.Brain_v_Ovary_pBB.TT <- topTags(pw.qlf.Brain_v_Ovary_pBB,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pBB.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pBB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pBB.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pBB.TT$table)
pw.qlf.Brain_v_Ovary_mBB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_mBB"])
pw.qlf.Brain_v_Ovary_mBB.TT <- topTags(pw.qlf.Brain_v_Ovary_mBB,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_mBB.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_mBB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_mBB.TT.names <- row.names(pw.qlf.Brain_v_Ovary_mBB.TT$table)
pw.qlf.Brain_v_Ovary_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pBb"])
pw.qlf.Brain_v_Ovary_pBb.TT <- topTags(pw.qlf.Brain_v_Ovary_pBb,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pBb.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pBb.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pBb.TT$table)
pw.qlf.Brain_v_Ovary_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pbb"])
pw.qlf.Brain_v_Ovary_pbb.TT <- topTags(pw.qlf.Brain_v_Ovary_pbb,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pbb.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pbb.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pbb.TT$table)
pw.qlf.Brain_BB_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_BB_v_bb"])
pw.qlf.Brain_BB_v_bb.TT <- topTags(pw.qlf.Brain_BB_v_bb,p.value = 1,n=100000)
pw.qlf.Brain_BB_v_bb.TT.sig <- topTags(pw.qlf.Brain_BB_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_BB_v_bb.TT.names <- row.names(pw.qlf.Brain_BB_v_bb.TT$table)
pw.qlf.Ovary_BB_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_BB_v_bb"])
pw.qlf.Ovary_BB_v_bb.TT <- topTags(pw.qlf.Ovary_BB_v_bb,p.value = 1,n=100000)
pw.qlf.Ovary_BB_v_bb.TT.sig <- topTags(pw.qlf.Ovary_BB_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_BB_v_bb.TT.names <- row.names(pw.qlf.Ovary_BB_v_bb.TT$table)
pw.qlf.Ovary_mBB_v_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_mBB_v_pBb"])
pw.qlf.Ovary_mBB_v_pBb.TT <- topTags(pw.qlf.Ovary_mBB_v_pBb,p.value = 1,n=100000)
pw.qlf.Ovary_mBB_v_pBb.TT.sig <- topTags(pw.qlf.Ovary_mBB_v_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_mBB_v_pBb.TT.names <- row.names(pw.qlf.Ovary_mBB_v_pBb.TT$table)
pw.qlf.Ovary_mBB_v_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_mBB_v_pbb"])
pw.qlf.Ovary_mBB_v_pbb.TT <- topTags(pw.qlf.Ovary_mBB_v_pbb,p.value = 1,n=100000)
pw.qlf.Ovary_mBB_v_pbb.TT.sig <- topTags(pw.qlf.Ovary_mBB_v_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_mBB_v_pbb.TT.names <- row.names(pw.qlf.Ovary_mBB_v_pbb.TT$table)
pw.qlf.Brain_mBB_v_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_mBB_v_pBb"])
pw.qlf.Brain_mBB_v_pBb.TT <- topTags(pw.qlf.Brain_mBB_v_pBb,p.value = 1,n=100000)
pw.qlf.Brain_mBB_v_pBb.TT.sig <- topTags(pw.qlf.Brain_mBB_v_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_mBB_v_pBb.TT.names <- row.names(pw.qlf.Brain_mBB_v_pBb.TT$table)
pw.qlf.Brain_mBB_v_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_mBB_v_pbb"])
pw.qlf.Brain_mBB_v_pbb.TT <- topTags(pw.qlf.Brain_mBB_v_pbb,p.value = 1,n=100000)
pw.qlf.Brain_mBB_v_pbb.TT.sig <- topTags(pw.qlf.Brain_mBB_v_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_mBB_v_pbb.TT.names <- row.names(pw.qlf.Brain_mBB_v_pbb.TT$table)
## Generate Volcano Plots for the Pairiwse Comparisons
# Pairwise, genotype-specific comparisons (Figure S1,A-D)
make_volcano(pw.qlf.Brain_BB_v_Bb,"Brain SB/SB vs. SB/Sb")
## [1] 36
## [1] 1
## [1] 0
## [1] 5
make_volcano(pw.qlf.Brain_BB_v_bb,"Brain SB/SB vs. Sb/Sb")
## [1] 68
## [1] 6
## [1] 3
## [1] 67
## Warning: Removed 3 rows containing missing values (geom_point).
make_volcano(pw.qlf.Ovary_BB_v_Bb,"Ovary SB/SB vs. SB/Sb")
## [1] 40
## [1] 2
## [1] 1
## [1] 0
## Warning: Removed 1 rows containing missing values (geom_point).
make_volcano(pw.qlf.Ovary_BB_v_bb,"Ovary SB/SB vs. Sb/Sb")
## [1] 179
## [1] 20
## [1] 13
## [1] 28
## Warning: Removed 1 rows containing missing values (geom_point).
# Pairwise, social form comparisons (Embedded within Figure 3B,C)
make_volcano(pw.qlf.Brain_M_v_P_BB,"Brain Monogyne SB/SB vs. Polygyne SB/SB")
## [1] 13
## [1] 3
## [1] 1
## [1] 5
make_volcano(pw.qlf.Ovary_M_v_P_BB,"Ovary Monogyne SB/SB vs. Polygyne SB/SB")
## [1] 0
## [1] 0
## [1] 0
## [1] 0
# Pairwise, combinatorial-effect comparisons (Embedded within Figure 3B,C)
make_volcano(pw.qlf.Brain_mBB_v_pBb,"Brain Monogyne SB/SB vs. Polygyne SB/Sb")
## [1] 30
## [1] 0
## [1] 2
## [1] 4
## Warning: Removed 1 rows containing missing values (geom_point).
make_volcano(pw.qlf.Ovary_mBB_v_pBb,"Ovary Monogyne SB/SB vs. Polygyne SB/Sb")
## [1] 175
## [1] 14
## [1] 9
## [1] 7
## Warning: Removed 1 rows containing missing values (geom_point).
## Generate UpSetR plot of Pairwise comparisons (Figure 2C)
PW_set <- list(Brain_pBB_pBb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
Brain_pBB_pbb=row.names(pw.qlf.Brain_BB_v_bb.TT.sig),
Ovary_pBB_pBb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
Ovary_pBB_pbb=row.names(pw.qlf.Ovary_BB_v_bb.TT.sig)
)
upset(fromList(PW_set),nsets=4,order.by = "freq")
## Generate Social Form Euler Plots (Figure 3B,C)
Tissue_set <- list(
Brain_mBBvpBB=row.names(pw.qlf.Brain_M_v_P_BB.TT.sig),
Ovary_mBBvpBB=row.names(pw.qlf.Ovary_M_v_P_BB.TT.sig),
Brain_pBBvpBb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
Ovary_pBBvpBb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
Brain_mBBvpBb=row.names(pw.qlf.Brain_mBB_v_pBb.TT.sig),
Ovary_mBBvpBb=row.names(pw.qlf.Ovary_mBB_v_pBb.TT.sig)
)
brain_euler <- plot(euler(Tissue_set[c(1,3,5)], shape = "ellipse"), quantities = TRUE)
brain_euler
ovary_euler <- plot(euler(Tissue_set[c(2,4,6)], shape = "ellipse"), quantities = TRUE)
## Warning in colSums(id & !empty) == 0 | merged_sets: longer object length is not
## a multiple of shorter object length
ovary_euler
## Format model matrix
Tissue <- factor(adv_group$Tissue)
Social.Form <- factor(adv_group$Social.Form)
Genotype_temp <- factor(adv_group$Genotype)
Genotype2 <- relevel(Genotype_temp, ref = "BB")
## Describe the Experiment Design and define Contrasts
glm_design <- model.matrix(~0 + Social.Form + Tissue * Genotype2, data = adv_group)
colnames(glm_design) <- c("Monogyne", "Polygyne", "TissueOvary", "Genotypebb", "GenotypeBb",
"Tissuebb", "TissueBb")
y_glm <- DGEList(x, samples = glm_design)
## Filter out low count genes
y_glm <- y_glm[(rowSums(cpm(y_glm) > 10/9) >= 8), , keep.lib.sizes = FALSE] ## Based on the smallest number of aligned reads in our libraries as per edgeR documentation
### Normalize samples by library size
y_glm <- calcNormFactors(y_glm)
Background.genes <- row.names(y_glm$counts)
## Begin Computing Differential Expression
y_glm <- estimateDisp(y_glm, glm_design)
y_glm <- estimateGLMCommonDisp(y_glm, glm_design)
y_glm <- estimateGLMTrendedDisp(y_glm, glm_design)
y_glm <- estimateGLMTagwiseDisp(y_glm, glm_design)
## QLM Fit and DE analysis
fit_glm <- glmQLFit(y_glm, glm_design)
## Compute Differential expression based on number of supergene alleles DE
## Analysis for Paper
glm.QLF.BBvBb <- glmQLFTest(fit_glm, coef = "GenotypeBb")
glm.QLF.BBvBb_TT <- topTags(glm.QLF.BBvBb, n = Inf)
glm.QLF.BBvBb_TT_sig <- topTags(glm.QLF.BBvBb_TT, p.value = 0.01, n = Inf)$table
glm.QLF.BBvbb <- glmQLFTest(fit_glm, coef = "Genotypebb")
glm.QLF.BBvbb_TT <- topTags(glm.QLF.BBvbb, n = Inf)
glm.QLF.BBvbb_TT_sig <- topTags(glm.QLF.BBvbb_TT, p.value = 0.01, n = Inf)$table
glm.QLF.all <- glmQLFTest(fit_glm, coef = 4:5)
glm.QLF.all_TT <- topTags(glm.QLF.all, n = Inf)
glm.QLF.all_TT_sig <- topTags(glm.QLF.all_TT, p.value = 0.01, n = Inf)$table
## Compute differential expression based on social form
con.SF <- makeContrasts(Monogyne - Polygyne, levels = glm_design)
glm.QLF.SF <- glmQLFTest(fit_glm, contrast = con.SF)
glm.QLF.SF_TT <- topTags(glm.QLF.SF, n = Inf)
glm.QLF.SF_TT_sig <- topTags(glm.QLF.SF_TT, p.value = 0.01, n = Inf)$table
## Visualizations ## Note here that due to the way the contrasts are set up, the
## directionality of the volcano plots is flipped. This was rectified in the
## manuscript. BB vs. Bb Volcano Plot (Figure 2A)
make_volcano(glm.QLF.BBvBb, "GLM BB vs. Bb")
## [1] 2
## [1] 1
## [1] 1
## [1] 46
# BB vs. bb Volcano Plot (Figure 2B)
make_volcano(glm.QLF.BBvbb, "GLM BB vs. bb")
## [1] 49
## [1] 3
## [1] 10
## [1] 91
# Monogyne vs Polygyne Volcano Plot (Figure 3A)
make_volcano(glm.QLF.SF, "GLM Monogyne vs. Polygyne")
## [1] 235
## [1] 61
## [1] 26
## [1] 8
## GO Term Enrichment Analysis
generate_GO_table(pw.qlf.Brain_BB_v_Bb, "Brain_BBvBL")
## NULL
generate_GO_table(pw.qlf.Brain_BB_v_bb, "Brain_BBvLL")
## NULL
generate_GO_table(pw.qlf.Ovary_BB_v_Bb, "Ovary_BBvBL")
## NULL
generate_GO_table(pw.qlf.Ovary_BB_v_bb, "Ovary_BBvLL")
## NULL
generate_GO_table(glm.QLF.BBvBb, "GLM_BBvBL")
## NULL
generate_GO_table(glm.QLF.BBvbb, "GLM_BBvLL")
## NULL
generate_GO_table(glm.QLF.SF, "GLM_SF")
## NULL
## Add Tags for Candidate Genes (These genes were derived from overlapping the various differential expression analyses)
load(file="~/Desktop/Sinv_Analyses/Candidate_gene_names.RData")
Candidate_Genes <- c(Naming_Frame_sub$Row.names,"gene1476")
temp.sig1 <- Sinv_annot_lg[((Sinv_annot_lg$X4 %in% Candidate_Genes)&(grepl("lg",Sinv_annot_lg$X1))),]
temp.sig1 <- temp.sig1[complete.cases(temp.sig1), ]
row.names(temp.sig1) <- temp.sig1$X4
## Warning: Setting row names on a tibble is deprecated.
temp.sig1 <- convert_LOC(temp.sig1)
## Appended gene descriptions that were not included due to an upstream error
temp.sig1[5,"description"] <- "dopamine receptor 1 "
temp.sig1[10,"description"] <- "pheromone-binding protein Gp-9-like"
temp.sig1[11,"description"] <- "NADH dehydrogenase [ubiquinone] 1 alpha subcomplex subunit 9, mitochondrial-like"
temp.sig1[14,"description"] <- "putative nuclease HARBI1"
DEG_GR1 <- makeGRangesFromDataFrame(temp.sig1, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
DEG_GR1$labels <- DEG_GR1$description
Chr_sizes_GR <- toGRanges("~/Desktop/Sinv_Analyses/lg_size.bed")
gene_bed_GR <- toGRanges("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lgonly_gene_v4.bed")
supergene_GR <- GRanges(seqnames = "lg16",ranges=IRanges(7311525,18123987)) ## Need to check points!!!
pp <- getDefaultPlotParams(plot.type = 4)
pp$data1inmargin <- 5
pp$rightmargin <- 0.2
kp <- plotKaryotype(genome = Chr_sizes_GR, plot.type=4, ideogram.plotter = NULL, labels.plotter = NULL,plot.params = pp)
kpAddCytobandsAsLine(kp,lwd = 30)
kpAddChromosomeNames(kp, srt=90)
core_bb_windows <- generate_heatmap_frame(row.names(glm.QLF.BBvbb_TT_sig))
core_Bb_windows <- generate_heatmap_frame(row.names(glm.QLF.BBvBb_TT_sig))
Brain_Bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Brain_BB_v_Bb.TT.sig))
Brain_bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Brain_BB_v_bb.TT.sig))
Ovary_Bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig))
Ovary_bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Ovary_BB_v_bb.TT.sig))
## Add DEG density Heatmap
colfunc <- colorRampPalette(c("white", "blue"))
kpHeatmap(kp,data=core_bb_windows,r0=0.11,r1=0.15,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=core_Bb_windows,r0=0.16,r1=0.2,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Brain_bb_windows,r0=0.21,r1=0.25,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Brain_Bb_windows,r0=0.26,r1=0.3,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Ovary_bb_windows,r0=0.31,r1=0.35,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Ovary_Bb_windows,r0=0.36,r1=0.4,colors = colfunc(5),ymin=0,ymax=20)
## Add supergene Marker
kpPlotRegions(kp,data = supergene_GR,col=adjustcolor("darkgreen",alpha.f = 0.5), r0=0.05,r1=0.1)
kpPlotMarkers(kp, DEG_GR1,labels = DEG_GR1$description,r0 = 0.41,r1=0.5,ignore.chromosome.ends = TRUE,max.iter = 10000,label.dist = -0.001,clipping = TRUE,marker.parts = c(0.1,0.8,0.1))
## Determine if there is a statistical overrepresentation of DEGs within the supergene and if it is directional.
## Check for overlap in supergene and DEGs for each pairwise comparison
check_supergene_presence_bias(glm.QLF.BBvBb_TT$table)
## [1] 17 354 17 8139
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 17 17
## [2,] 354 8139
## [,1] [,2]
## [1,] 1.479301 32.5207
## [2,] 369.520699 8123.4793
## [1] 4.633262e-39
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 5.933e-15
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 10.91879 48.26582
## sample estimates:
## odds ratio
## 22.96189
## NULL
check_supergene_presence_bias(glm.QLF.BBvbb_TT$table)
## [1] 46 325 50 8106
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 46 50
## [2,] 325 8106
## [,1] [,2]
## [1,] 4.17685 91.82315
## [2,] 366.82315 8064.17685
## [1] 2.643213e-98
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 14.77568 35.49537
## sample estimates:
## odds ratio
## 22.91676
## NULL
check_supergene_presence_bias(pw.qlf.Brain_BB_v_Bb.TT$table)
## [1] 14 357 14 8142
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 14 14
## [2,] 357 8142
## [,1] [,2]
## [1,] 1.218248 26.78175
## [2,] 369.781752 8129.21825
## [1] 1.902539e-32
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 1.576e-12
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 9.984269 51.955997
## sample estimates:
## odds ratio
## 22.77735
## NULL
check_supergene_presence_bias(pw.qlf.Brain_BB_v_bb.TT$table)
## [1] 38 333 58 8098
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 38 58
## [2,] 333 8098
## [,1] [,2]
## [1,] 4.17685 91.82315
## [2,] 366.82315 8064.17685
## [1] 6.043095e-65
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 10.12979 24.77462
## sample estimates:
## odds ratio
## 15.91616
## NULL
check_supergene_presence_bias(pw.qlf.Ovary_BB_v_Bb.TT$table)
## [1] 8 363 16 8140
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 8 16
## [2,] 363 8140
## [,1] [,2]
## [1,] 1.044213 22.95579
## [2,] 369.955787 8133.04421
## [1] 3.172793e-12
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 4.744e-06
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 4.122905 27.963466
## sample estimates:
## odds ratio
## 11.20021
## NULL
check_supergene_presence_bias(pw.qlf.Ovary_BB_v_bb.TT$table)
## [1] 40 331 117 8039
## [1] -2555
## [,1] [,2]
## [1,] 40 117
## [2,] 331 8039
## [,1] [,2]
## [1,] 6.83089 150.1691
## [2,] 364.16911 8005.8309
## [1] 3.400547e-39
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 5.549275 12.199496
## sample estimates:
## odds ratio
## 8.298817
## NULL
## Generate adjusted logFC versions of the results tables so pw and glm tables are comparable.
glm.QLF.BBvBb_adj <- glm.QLF.BBvBb_TT_sig
glm.QLF.BBvBb_adj$logFC <- -1*glm.QLF.BBvBb_TT_sig$logFC
glm.QLF.BBvbb_adj <- glm.QLF.BBvbb_TT_sig
glm.QLF.BBvbb_adj$logFC <- -1*glm.QLF.BBvbb_TT_sig$logFC
check_supergene_dir_bias(glm.QLF.BBvBb_adj)
## [1] 1 16 2 15
## [1] -16
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 1 2
## [2,] 16 15
## [,1] [,2]
## [1,] 1.5 1.5
## [2,] 15.5 15.5
## [1] 0.5454172
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.007491873 10.104993853
## sample estimates:
## odds ratio
## 0.4790176
## NULL
check_supergene_dir_bias(glm.QLF.BBvbb_adj)
## [1] 18 28 22 28
## [1] -57
## [,1] [,2]
## [1,] 18 22
## [2,] 28 28
## [,1] [,2]
## [1,] 19.16667 20.83333
## [2,] 26.83333 29.16667
## [1] 0.6287651
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.6818
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.3345486 1.9934956
## sample estimates:
## odds ratio
## 0.8198997
## NULL
check_supergene_dir_bias(pw.qlf.Brain_BB_v_Bb.TT.sig)
## [1] 1 13 4 10
## [1] -14
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 1 4
## [2,] 13 10
## [,1] [,2]
## [1,] 2.5 2.5
## [2,] 11.5 11.5
## [1] 0.1387917
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.3259
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.003637868 2.484508917
## sample estimates:
## odds ratio
## 0.2033741
## NULL
check_supergene_dir_bias(pw.qlf.Brain_BB_v_bb.TT.sig)
## [1] 17 21 38 20
## [1] -48
## [,1] [,2]
## [1,] 17 38
## [2,] 21 20
## [,1] [,2]
## [1,] 21.77083 33.22917
## [2,] 16.22917 24.77083
## [1] 0.04412524
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.05811
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1693863 1.0681989
## sample estimates:
## odds ratio
## 0.4300187
## NULL
check_supergene_dir_bias(pw.qlf.Ovary_BB_v_Bb.TT.sig)
## [1] 1 7 0 16
## [1] -19
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-squared
## approximation may be incorrect
## [,1] [,2]
## [1,] 1 0
## [2,] 7 16
## [,1] [,2]
## [1,] 0.3333333 0.6666667
## [2,] 7.6666667 15.3333333
## [1] 0.1485618
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.3333
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.05128188 Inf
## sample estimates:
## odds ratio
## Inf
## NULL
check_supergene_dir_bias(pw.qlf.Ovary_BB_v_bb.TT.sig)
## [1] 19 21 17 100
## [1] -83
## [,1] [,2]
## [1,] 19 17
## [2,] 21 100
## [,1] [,2]
## [1,] 9.171975 26.82803
## [2,] 30.828025 90.17197
## [1] 1.852027e-05
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 5.337e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.192937 12.846506
## sample estimates:
## odds ratio
## 5.250997
## NULL
## Take the results from above and visualize it as a stacked bar plot (Figure S2)
DE_dir_CT <- read.delim("~/Desktop/Sinv_Analyses/DE_dir_CT3.tsv")
DE_dir_CT$Comparison_f = factor(DE_dir_CT$Comparison, levels=c('Full BB vs. Bb','Brain BB vs. Bb','Ovary BB vs. Bb','Full BB vs. bb','Brain BB vs. bb','Ovary BB vs. bb'))
ggplot(data=DE_dir_CT, aes(x=Genome.Position, y=DEG_Ratio, fill=DE.Direction)) +
geom_bar(stat="identity") +
facet_grid(~Comparison_f) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Load SNP-level files in the individual GATK ASE Read Counter files (for all 16 Bb libraries)
setwd("~/Desktop/Sinv_Analyses/GATK_ASERC_mod/")
temp = list.files(pattern="*counts.csv")
temp2 <- sapply(strsplit(temp, split='_', fixed=TRUE), function(x) (x[1]))
myfiles = lapply(temp, read.delim)
# Note: Each File is saved as myfiles[[x]] as a data.frame
## merge first two columns into identifier column
for (i in 1:16){
myfiles[[i]]$ID <- paste(myfiles[[i]]$contig,myfiles[[i]]$position,myfiles[[i]]$altAllele,sep = "~")
colnames(myfiles[[i]]) <- paste(colnames(myfiles[[i]]), temp2[i], sep = ".")
colnames(myfiles[[i]])[14] <- "ID"
}
## merge all data.frames by ID
merged <- Reduce(function(...) merge(..., by='ID', all.x=FALSE), myfiles)
## Load gene-level files (made using bedtools intersect) in the individual GATK ASE Read Counter files (for all 16 Bb libraries)
temp_gene = list.files(pattern="*genes.tsv")
temp2_gene <- sapply(strsplit(temp_gene, split='_', fixed=TRUE), function(x) (x[1]))
myfiles_gene = lapply(temp_gene, read.delim)
# Note: Each File is saved as myfiles[[x]] as a data.frame
## merge first two columns into identifier column
for (i in 1:16){
colnames(myfiles_gene[[i]]) <- paste(colnames(myfiles_gene[[i]]), temp2_gene[i], sep = ".")
colnames(myfiles_gene[[i]])[1] <- "gene"
}
## merge all data.frames by ID
merged_genes <- Reduce(function(...) merge(..., by='gene', all.x=FALSE), myfiles_gene)
pull_cols_genes <- grepl("refCount|altCount",colnames(merged_genes))
row.names(merged_genes) <- merged_genes$gene
x_genes <- merged_genes[,pull_cols_genes]
## Extract the specific columns that I want (refCount and altCount)
pull_cols <- grepl("refCount|altCount",colnames(merged))
row.names(merged) <- merged$ID
x <- merged[,pull_cols]
## Load in model matrix (Modified to analyze ASE)
adv_group_ASE <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_model_matrix6.tsv",sep = "\t")
head(adv_group_ASE)
## Sample Individual_ID Colony_ID Tissue Social.Form Genotype Alt_ID
## 1 104AB 104A 104 Brain Polygyne Bb Brain.Polygyne.Bb
## 2 104AB 104A 104 Brain Polygyne Bb Brain.Polygyne.Bb
## 3 104AO 104A 104 Ovary Polygyne Bb Ovary.Polygyne.Bb
## 4 104AO 104A 104 Ovary Polygyne Bb Ovary.Polygyne.Bb
## 5 107AB 107A 107 Brain Polygyne Bb Brain.Polygyne.Bb
## 6 107AB 107A 107 Brain Polygyne Bb Brain.Polygyne.Bb
## Allele
## 1 Reference
## 2 Alternative
## 3 Reference
## 4 Alternative
## 5 Reference
## 6 Alternative
Colony_ID <- factor(adv_group_ASE$Colony_ID)
Tissue <- factor(adv_group_ASE$Tissue)
Allele <- factor(adv_group_ASE$Allele)
## Describe the Experiment Design and define Contrasts
ASE_design <- model.matrix(~0+Tissue*Allele,data=adv_group_ASE)
y_ASE <- DGEList(x,samples=ASE_design)
y_ASE <- estimateDisp(y_ASE,ASE_design)
y_ASE <- estimateGLMCommonDisp(y_ASE, ASE_design)
y_ASE <- estimateGLMTrendedDisp(y_ASE, ASE_design)
y_ASE <- estimateGLMTagwiseDisp(y_ASE, ASE_design)
## QLM Fit and DE analysis
fit_ASE <- glmQLFit(y_ASE,ASE_design)
## DE Analysis
QLF.ASE <- glmQLFTest(fit_ASE,coef="AlleleReference")
QLF.ASE_TT <- topTags(QLF.ASE,n=Inf)
QLF.ASE_TT_sig <- topTags(QLF.ASE_TT,p.value = 0.01,n=Inf)$table
QLF.ASE_TT_sig$SNPs <- row.names(QLF.ASE_TT_sig)
make_volcano(QLF.ASE,"SB vs. Sb in SB/Sb individuals")
## [1] 79
## [1] 14
## [1] 6
## [1] 61
## Warning: Removed 1 rows containing missing values (geom_point).
## Note: Positive values in volcano plot indicate higher expression in reference.
## Describe the Experiment Design and define Contrasts
y_gene<- DGEList(x_genes,samples=ASE_design)
y_gene <- estimateDisp(y_gene,ASE_design)
y_gene <- estimateGLMCommonDisp(y_gene, ASE_design)
y_gene <- estimateGLMTrendedDisp(y_gene, ASE_design)
y_gene <- estimateGLMTagwiseDisp(y_gene, ASE_design)
## QLM Fit and DE analysis
fit_gene <- glmQLFit(y_gene,ASE_design)
## DE Analysis
QLF.ASE.gene <- glmQLFTest(fit_gene,coef="AlleleReference")
QLF.ASE.gene_TT <- topTags(QLF.ASE.gene,n=Inf)
QLF.ASE.gene_TT_sig <- topTags(QLF.ASE.gene_TT,p.value = 0.01,n=Inf)$table
QLF.ASE.gene_TT_all <- topTags(QLF.ASE.gene_TT,p.value = 1,n=Inf)$table
## Generate ASE Gene Volcano plot (Figure 4A)
make_volcano(QLF.ASE.gene,"Alternatively Spliced Genes")
## [1] 10
## [1] 6
## [1] 4
## [1] 9
## Note: Positive values in volcano plot indicate higher expression in reference.
## Break the topTags file into a better data.frame
QLF.ASE_TT_df <- QLF.ASE_TT$table
QLF.ASE_TT_df$chr <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[1]))
QLF.ASE_TT_df$start <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[2]))
QLF.ASE_TT_df$end <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[2]))
## merge with linkage map.
genetic_map <- read.delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/genetic_map.txt", comment.char="#")
head (genetic_map)
## scaffold start end orientation lg lg_position partial
## 1 NW_011798938.1 1 955721 uncertain lg1 1 FALSE
## 2 NW_011794913.1 1 1181834 + lg1 955822 FALSE
## 3 NW_011795398.1 1 1293734 - lg1 2137756 FALSE
## 4 NW_011801858.1 1 1483336 - lg1 3431590 FALSE
## 5 NW_011798146.1 1 451599 - lg1 4915026 FALSE
## 6 NW_011800891.1 1 3265217 + lg1 5366725 FALSE
## Map the SNPs (based on gnG genome) to the linkage map.
QLF.ASE_TT_df_merge <- merge(QLF.ASE_TT_df,genetic_map,by.x="chr",by.y="scaffold")
for (i in 1:nrow(QLF.ASE_TT_df_merge)){
if (QLF.ASE_TT_df_merge$orientation[i]=="-"){
QLF.ASE_TT_df_merge$newstart[i] <- QLF.ASE_TT_df_merge$lg_position[i] + (QLF.ASE_TT_df_merge$end.y[i]-as.numeric(QLF.ASE_TT_df_merge$start.x[i]))
} else {
QLF.ASE_TT_df_merge$newstart[i] <- QLF.ASE_TT_df_merge$lg_position[i] + as.numeric(QLF.ASE_TT_df_merge$start.x[i]) - as.numeric(QLF.ASE_TT_df_merge$start.y[i])
}
}
QLF.ASE_TT_df_merge$newend <- QLF.ASE_TT_df_merge$newstart
QLF.ASE_TT_df_merge <- QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$FDR<=0.01),]
## Add in LOC IDs for the ASE gene frames
QLF.ASE.gene_TT_sig_LOC <- convert_LOC(QLF.ASE.gene_TT_sig)
QLF.ASE.gene_TT_all_LOC <- convert_LOC(QLF.ASE.gene_TT_all)
## Make ASE DEG Upset Plot (Figure 4B)
##Filter Gene name lists down by which genes were analyzed in ASE and DE
ASE_background <- QLF.ASE.gene_TT_all_LOC$Row.names
Upset_backgroud <- intersect(Background.genes,ASE_background)
ASE_genes_b <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$FDR <= 0.01)&(QLF.ASE.gene_TT_all_LOC$logFC<0),c("Row.names")]
ASE_genes_B <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$FDR <= 0.01)&(QLF.ASE.gene_TT_all_LOC$logFC>0),c("Row.names")]
DE_genes_b <- row.names(glm.QLF.BBvBb_TT_sig[(glm.QLF.BBvBb_TT_sig$logFC>0),])
DE_genes_B <- row.names(glm.QLF.BBvBb_TT_sig[(glm.QLF.BBvBb_TT_sig$logFC<0),])
ASE_genes_b <- intersect(ASE_genes_b,Upset_backgroud)
ASE_genes_B <- intersect(ASE_genes_B,Upset_backgroud)
DE_genes_b <- intersect(DE_genes_b,Upset_backgroud)
DE_genes_B <- intersect(DE_genes_B,Upset_backgroud)
Overlap <- list(ASE_b=ASE_genes_b,
ASE_B=ASE_genes_B,
DE_SBSb=DE_genes_b,
DE_SBSB=DE_genes_B
)
upset(fromList(Overlap),nsets=4,order.by = "freq",text.scale = 1.4)
## Extract overlap information
b_overlap <- ASE_genes_b[(ASE_genes_b %in% DE_genes_b)]
b_overlap_data <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% b_overlap),]
B_overlap <- ASE_genes_B[ASE_genes_B %in% DE_genes_B]
QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% B_overlap),]
## Row.names logFC logCPM F PValue FDR Name
## 61 gene11836 1.152001 11.61619 47.65609 7.660374e-08 2.429433e-06 LOC105203076
## description
## 61 nucleolar protein 16 [Source:RefSeq mRNA;Acc:XM_011171831.1]
nonB_overlap <- ASE_genes_B[!(ASE_genes_B %in% DE_genes_B)]
dosage_comp_B <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% nonB_overlap),]
nonb_overlap <- ASE_genes_b[!(ASE_genes_b %in% DE_genes_b)]
dosage_comp_b <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% nonb_overlap),]
## Set some plotting parameters for the karyoploteR
plot.params <- getDefaultPlotParams(plot.type = 1)
plot.params$data1height=300
plot.params$data1outmargin = 20
plot.params$data1inmargin = 5
plot.params$ideogramheight = 10
col.over <- "#FFBD07AA" #Yellow
col.under <- "#00A6EDAA" #Blue
col.insig <- "#7F7F7F"
## Begin Creating the tracks for Figure 5
kp <- plotKaryotype(genome = Chr_sizes_GR, cytobands = gene_bed_GR,chromosomes = c("lg16"),plot.params = plot.params)
## Warning in ideogram.plotter(kp, ...): No 'gieStain' column found in cytobands.
## Using 'gpos50' (gray) for all of them
kpAddBaseNumbers(kp,tick.dist = 1000000,add.units = TRUE)
## Add Supergene
kpPlotRegions(kp,data = supergene_GR,col="darkgreen", r0=0,r1=0.01)
## Add in various inverions from Huang et al. (from 0 to 0.1)
gr <- GRanges(seqnames = "lg16", strand = c("+", "+", "+"),
ranges = IRanges(start = c(7948943,8793193,18097796), end = c(8793193,18097796,18098299)))
values(gr) <- DataFrame(labels = c("Inversion A","Inversion B","Inversion C"))
kpPlotRegions(kp,data = gr[1],col="purple", r0=0.02,r1=0.03)
kpPlotRegions(kp,data = gr[2],col="orange", r0=0.02,r1=0.03)
kpPlotRegions(kp,data = gr[3],col="red", r0=0.02,r1=0.03)
## Plot Gene density across Lg16
kpPlotDensity(kp, gene_bed_GR, window.size = 500000, col="#ddaacc",r0 = 0.04,r1 = 0.09)
## Core BBvbb
QLF.BBvbb_adj.merge2 <- merge(glm.QLF.BBvbb_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4")
QLF.BBvbb_adj.merge2$logFC <- -1*QLF.BBvbb_adj.merge2$logFC
QLF.BBvbb_adj.merge <- QLF.BBvbb_adj.merge2[QLF.BBvbb_adj.merge2$X1=="lg16",]
DEG_GR <- makeGRangesFromDataFrame(QLF.BBvbb_adj.merge, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
fc.ymax=ceiling(max(abs(DEG_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(DEG_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(DEG_GR))
sign.col[(DEG_GR$logFC<0)&(DEG_GR$FDR<=0.01)] <- col.under
sign.col[(DEG_GR$logFC>0)&(DEG_GR$FDR<=0.01)] <- col.over
kpPoints(kp, data=DEG_GR, y=DEG_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.14, r1 = 0.29)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.14, r1 = 0.29)
DEG_density_B <- generate_heatmap_frame(QLF.BBvbb_adj.merge[QLF.BBvbb_adj.merge$logFC>0,]$Row.names)
DEG_density_b <- generate_heatmap_frame(QLF.BBvbb_adj.merge[QLF.BBvbb_adj.merge$logFC<0,]$Row.names)
colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=DEG_density_B,r0=0.3,r1=0.33,colors = colfunc(5))
kpHeatmap(kp,data=DEG_density_b,r0=0.1+0,r1=0.13,colors = colfunc(5))
## Core BBvBb
QLF.BBvBb_adj.merge2 <- merge(glm.QLF.BBvBb_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4")
QLF.BBvBb_adj.merge <- QLF.BBvBb_adj.merge2[QLF.BBvBb_adj.merge2$X1=="lg16",]
DEG_GR <- makeGRangesFromDataFrame(QLF.BBvBb_adj.merge, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
fc.ymax=ceiling(max(abs(DEG_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(DEG_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(DEG_GR))
sign.col[(DEG_GR$logFC<0)&(DEG_GR$FDR<=0.01)] <- col.under
sign.col[(DEG_GR$logFC>0)&(DEG_GR$FDR<=0.01)] <- col.over
kpPoints(kp, data=DEG_GR, y=DEG_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.39, r1 = 0.54)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.38, r1 = 0.54)
DEG_density_B <- generate_heatmap_frame(QLF.BBvBb_adj.merge[QLF.BBvBb_adj.merge$logFC>0,]$Row.names)
DEG_density_b <- generate_heatmap_frame(QLF.BBvBb_adj.merge[QLF.BBvBb_adj.merge$logFC<0,]$Row.names)
colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=DEG_density_b,r0=0.35,r1=0.38,colors = colfunc(5))
kpHeatmap(kp,data=DEG_density_B,r0=0.55,r1=0.58,colors = colfunc(5))
## Add in ASE Heatmap and B vs. b dots 0.6-0.8
ASE_GR <- makeGRangesFromDataFrame(QLF.ASE_TT_df_merge, seqnames.field = "lg",start.field = "newstart",end.field = "newend",keep.extra.columns = TRUE)
fc.ymax=ceiling(max(abs(ASE_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(ASE_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(ASE_GR))
col.over2 <- rgb(125, 125, 0, max = 255, alpha = 100) #Yellow
col.under2 <- rgb(0, 0, 255, max = 255, alpha = 100) #Blue
sign.col[(ASE_GR$logFC<0)&(ASE_GR$FDR<=0.01)] <- col.under2
sign.col[(ASE_GR$logFC>0)&(ASE_GR$FDR<=0.01)] <- col.over2
kpPoints(kp, data=ASE_GR, y=ASE_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.64, r1 = 0.79)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.63, r1 = 0.79)
SNP_density_B <- generate_heatmap_frame_SNP(QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$logFC>0),])
SNP_density_b <- generate_heatmap_frame_SNP(QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$logFC<0),])
colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=SNP_density_B,r0=0.80,r1=0.83,colors = colfunc(5))
kpHeatmap(kp,data=SNP_density_b,r0=0.60,r1=0.63,colors = colfunc(5))
## It is worth mentioning here that a few of the genes that are differentially expressed are not actually counted in this location analysis because they are located on the cusp of the linkage mapping information.Consequently, it was decided to exclude them from this analysis as their position cannot be confidently decided.
## Perform Run's Test on DEGs, Testing for non-uniformity in significance
perform_runs_test_sig(glm.QLF.BBvBb_TT)
##
## Runs Test
##
## data: supergene_order$FDR
## statistic = -1.4708, runs = 31, n1 = 354, n2 = 17, n = 371, p-value =
## 0.1414
## alternative hypothesis: nonrandomness
perform_runs_test_sig(glm.QLF.BBvbb_TT)
##
## Runs Test
##
## data: supergene_order$FDR
## statistic = -0.14242, runs = 81, n1 = 325, n2 = 46, n = 371, p-value =
## 0.8868
## alternative hypothesis: nonrandomness
## Perform Run's Test on DEGs, Testing for non-uniformity in directionality of significant DEGs
perform_runs_test_dir(glm.QLF.BBvBb_TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = 0.36515, runs = 3, n1 = 16, n2 = 1, n = 17, p-value = 0.715
## alternative hypothesis: nonrandomness
perform_runs_test_dir(glm.QLF.BBvbb_TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = -1.5396, runs = 18, n1 = 28, n2 = 18, n = 46, p-value =
## 0.1237
## alternative hypothesis: nonrandomness
## Perform Run's Test on DEGs, Testing for non-uniformity in directionality of significant DEGs in specific tissues
perform_runs_test_dir(pw.qlf.Brain_BB_v_Bb.TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = 0.40825, runs = 3, n1 = 1, n2 = 13, n = 14, p-value =
## 0.6831
## alternative hypothesis: nonrandomness
perform_runs_test_dir(pw.qlf.Ovary_BB_v_Bb.TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = 0.57735, runs = 3, n1 = 1, n2 = 7, n = 8, p-value = 0.5637
## alternative hypothesis: nonrandomness
perform_runs_test_dir(pw.qlf.Brain_BB_v_bb.TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = -1.2608, runs = 16, n1 = 17, n2 = 21, n = 38, p-value =
## 0.2074
## alternative hypothesis: nonrandomness
perform_runs_test_dir(pw.qlf.Ovary_BB_v_bb.TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = -0.30513, runs = 20, n1 = 19, n2 = 21, n = 40, p-value =
## 0.7603
## alternative hypothesis: nonrandomness
## Perform Run's Test on ASE genes for significance and directionality
perform_runs_test_sig(QLF.ASE.gene_TT)
##
## Runs Test
##
## data: supergene_order$FDR
## statistic = 1.6682, runs = 57, n1 = 192, n2 = 29, n = 221, p-value =
## 0.09527
## alternative hypothesis: nonrandomness
perform_runs_test_dir(QLF.ASE.gene_TT)
##
## Runs Test
##
## data: supergene_order_sig$logFC
## statistic = -0.13188, runs = 15, n1 = 13, n2 = 16, n = 29, p-value =
## 0.8951
## alternative hypothesis: nonrandomness
## Runs ASE by SNP
ASE_logFC <- QLF.ASE_TT_df_merge[order(QLF.ASE_TT_df_merge$newend),c("logFC")]
runs.test(ASE_logFC,plot=T,threshold = 0)
##
## Runs Test
##
## data: ASE_logFC
## statistic = -5.5217, runs = 45, n1 = 67, n2 = 93, n = 160, p-value =
## 3.356e-08
## alternative hypothesis: nonrandomness
## Merge with linkage mapped annotation to determine supergene vs. nonsupergene genes
glm.QLF.all_TT_sig_merge <- merge(glm.QLF.all_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
supergene_genes <- glm.QLF.all_TT_sig_merge[((glm.QLF.all_TT_sig_merge$X1=="lg16")&(glm.QLF.all_TT_sig_merge$X2>=7311525)&(glm.QLF.all_TT_sig_merge$X3<=18123987)),c("Row.names")]
supergene_genes <- supergene_genes[!is.na(supergene_genes)]
nonsuper_genes <- glm.QLF.all_TT_sig_merge[((grepl(pattern = "lg",x = glm.QLF.all_TT_sig_merge$X1))&(glm.QLF.all_TT_sig_merge$Row.names %!in% supergene_genes)),c("Row.names")]
## Generate Polar Plot with only the genes in the supergene (Figure S3,B,C)
NCBI_supergene_polar <- make_polar_DEG(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),],15)
NCBI_supergene_polar
## Warning: Removed 1 rows containing missing values (geom_point).
NCBI_nonsupergene_polar <- make_polar_DEG(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),],10)
NCBI_nonsupergene_polar
## Extract the angles of expression using an arctangent and then compare the radial distributions using a watson's two test
nonsuper_angle <- atan2(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),c("logFC.GenotypeBb")],glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),c("logFC.Genotypebb")])
supergene_angle <- atan2(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),c("logFC.GenotypeBb")],glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),c("logFC.Genotypebb")])
watson.two(nonsuper_angle, supergene_angle, alpha=0, plot=TRUE)
##
## Watson's Two-Sample Test of Homogeneity
##
## Test Statistic: 0.2196
## 0.01 < P-value < 0.05
##
## Generate Dominance angle data.frame
dominance_angle <- data.frame(gene=row.names(glm.QLF.all_TT$table),angle=atan2(glm.QLF.all_TT$table$logFC.GenotypeBb,glm.QLF.all_TT$table$logFC.Genotypebb))
row.names(dominance_angle) <- dominance_angle$gene
## Extract the results from the necessary data.frames and rename columns
Brain_Bb <- pw.qlf.Brain_BB_v_Bb.TT$table[,c(1,2,5)]
colnames(Brain_Bb) <- c("Brain_Bb_logFC","Brain_Bb_logCPM","Brain_Bb_FDR")
Brain_bb <- pw.qlf.Brain_BB_v_bb.TT$table[,c(1,2,5)]
colnames(Brain_bb) <- c("Brain_bb_logFC","Brain_bb_logCPM","Brain_bb_FDR")
Ovary_Bb <- pw.qlf.Ovary_BB_v_Bb.TT$table[,c(1,2,5)]
colnames(Ovary_Bb) <- c("Ovary_Bb_logFC","Ovary_Bb_logCPM","Ovary_Bb_FDR")
Ovary_bb <- pw.qlf.Ovary_BB_v_bb.TT$table[,c(1,2,5)]
colnames(Ovary_bb) <- c("Ovary_bb_logFC","Ovary_bb_logCPM","Ovary_bb_FDR")
Multifactorial_Bb <- glm.QLF.BBvBb_TT$table[,c(1,2,5)]
colnames(Multifactorial_Bb) <- c("Multifactorial_Bb_logFC","Multifactorial_Bb_logCPM","Multifactorial_Bb_FDR")
Multifactorial_bb <- glm.QLF.BBvbb_TT$table[,c(1,2,5)]
colnames(Multifactorial_bb) <- c("Multifactorial_bb_logFC","Multifactorial_bb_logCPM","Multifactorial_bb_FDR")
ASE_gene <- QLF.ASE.gene_TT_all[,c(1,2,5)]
colnames(ASE_gene) <- c("ASE_logFC","ASE_logCPM","ASE_FDR")
Sinv_annot_formerge <- data.frame(Sinv_annot_lg)
row.names(Sinv_annot_formerge) <- Sinv_annot_formerge$X4
colnames(Sinv_annot_formerge) <- c("chr","start","end","gene")
Sinv_annot_formerge <- Sinv_annot_formerge[,1:3]
Si_gnG_refseq <- readGFF(file="/Users/samarsenault/Desktop/Sinv_Analyses/NCBI_genome/GCF_000188075.1_Si_gnG/GCF_000188075.1_Si_gnG_genomic.gff")
refseq_LOC <- data.frame(Si_gnG_refseq[(Si_gnG_refseq$type=="gene")|(Si_gnG_refseq$type=="pseudogene"),c("ID","Name")])
refseq_desc_old <- data.frame(Si_gnG_refseq[(Si_gnG_refseq$type=="mRNA"),c("gene","product")])
refseq_desc = refseq_desc_old[order(refseq_desc_old[,'gene']),]
refseq_desc = refseq_desc[!duplicated(refseq_desc$gene),]
## Remove Redundent rows (genes with multiple transcripts in the annotation)
refseq_merge <- merge(refseq_LOC,refseq_desc,by.x="Name",by.y="gene",all.x=TRUE)
refseq_merge <- distinct(refseq_merge)
row.names(refseq_merge) <- refseq_merge$ID
refseq_merge <- refseq_merge[,c(1,3)]
colnames(refseq_merge) <- c("LOC_ID","Description")
temp1 <- merge(Sinv_annot_formerge,refseq_merge,by=0,all=T)
temp1 <- merge(temp1,Brain_Bb,by.x="Row.names",by.y=0,all=T)
temp1 <- merge(temp1,Brain_bb,by.x="Row.names",by.y=0,all=T)
temp1 <- merge(temp1,Ovary_Bb,by.x="Row.names",by.y=0,all=T)
temp1 <- merge(temp1,Ovary_bb,by.x="Row.names",by.y=0,all=T)
temp1 <- merge(temp1,Multifactorial_Bb,by.x="Row.names",by.y=0,all=T)
temp1 <- merge(temp1,Multifactorial_bb,by.x="Row.names",by.y=0,all=T)
temp1 <- merge(temp1,ASE_gene,by.x="Row.names",by.y=0,all=T)
All_Data_merge <- merge(temp1,dominance_angle,by.x="Row.names",by.y=0,all=T)
write.table(All_Data_merge,file = "~/Desktop/Sinv_Analyses/All_Data_merge_new.tsv",quote = F,sep = "\t",row.names = F)
## Extract dosage compensation candidate genes
b_up_DosComp <- All_Data_merge[((All_Data_merge$Multifactorial_Bb_FDR>0.01)&(All_Data_merge$ASE_logFC<0)&(All_Data_merge$ASE_FDR<=0.01)&!is.na(All_Data_merge$ASE_FDR)),]
B_up_DosComp <- All_Data_merge[((All_Data_merge$Multifactorial_Bb_FDR>0.01)&(All_Data_merge$ASE_logFC>0)&(All_Data_merge$ASE_FDR<=0.01)&!is.na(All_Data_merge$ASE_FDR)),]
## Import the OBP-aligned RNA-seq Data
x1_OBP <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all",
"\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## GeneID = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 52 parsing failures.
## row col expected actual file
## 1 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 2 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 3 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 4 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 5 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## ... ... .......... .......... ........................................................
## See problems(...) for more details.
x_OBP <- data.frame(x1_OBP[1:52,2:64],row.names = x1_OBP$GeneID[1:52])
## Generate data.frame for pairwise OBP analyses
y_OBP <- DGEList(x_OBP,group = adv_group$Alt_ID)
keep_OBP <- rowSums(cpm(y_OBP)>10/9) >= 8 ## NEED TO EDIT!!!
y_OBP <- y_OBP[keep_OBP, , keep.lib.sizes=FALSE]
## Normalize samples by library size
y_OBP <- calcNormFactors(y_OBP)
obp.Background.genes <- row.names(y_OBP$counts)
## Begin Computing Differential Expression
y_OBP <- estimateDisp(y_OBP,design_pw)
y_OBP <- estimateGLMCommonDisp(y_OBP, design_pw)
y_OBP <- estimateGLMTrendedDisp(y_OBP, design_pw)
y_OBP <- estimateGLMTagwiseDisp(y_OBP, design_pw)
## QLM Fit and DE analysis
## Note: For more conservative estimates, use glmQLFTest vs. glmLRT
fit_OBP <- glmQLFit(y_OBP,design_pw)
obp.qlf.Brain_BB_v_Bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Brain_BB_v_Bb"])
obp.qlf.Brain_BB_v_Bb.TT <- topTags(obp.qlf.Brain_BB_v_Bb,p.value = 1,n=100000)
obp.qlf.Brain_BB_v_Bb.TT.sig <- topTags(obp.qlf.Brain_BB_v_Bb,p.value = 0.01,n=100000)$table
obp.qlf.Brain_BB_v_Bb.TT.names <- row.names(obp.qlf.Brain_BB_v_Bb.TT$table)
obp.qlf.Ovary_BB_v_Bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Ovary_BB_v_Bb"])
obp.qlf.Ovary_BB_v_Bb.TT <- topTags(obp.qlf.Ovary_BB_v_Bb,p.value = 1,n=100000)
obp.qlf.Ovary_BB_v_Bb.TT.sig <- topTags(obp.qlf.Ovary_BB_v_Bb,p.value = 0.01,n=100000)$table
obp.qlf.Ovary_BB_v_Bb.TT.names <- row.names(obp.qlf.Ovary_BB_v_Bb.TT$table)
obp.qlf.Brain_BB_v_bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Brain_BB_v_bb"])
obp.qlf.Brain_BB_v_bb.TT <- topTags(obp.qlf.Brain_BB_v_bb,p.value = 1,n=100000)
obp.qlf.Brain_BB_v_bb.TT.sig <- topTags(obp.qlf.Brain_BB_v_bb,p.value = 0.01,n=100000)$table
obp.qlf.Brain_BB_v_bb.TT.names <- row.names(obp.qlf.Brain_BB_v_bb.TT$table)
obp.qlf.Ovary_BB_v_bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Ovary_BB_v_bb"])
obp.qlf.Ovary_BB_v_bb.TT <- topTags(obp.qlf.Ovary_BB_v_bb,p.value = 1,n=100000)
obp.qlf.Ovary_BB_v_bb.TT.sig <- topTags(obp.qlf.Ovary_BB_v_bb,p.value = 0.01,n=100000)$table
obp.qlf.Ovary_BB_v_bb.TT.names <- row.names(obp.qlf.Ovary_BB_v_bb.TT$table)
## Now the GLM version of the analysis
y_obp_glm <- DGEList(x_OBP,samples=glm_design)
## Filter out low count genes
y_obp_glm <- y_obp_glm[keep_OBP, , keep.lib.sizes=FALSE]
### Normalize samples by library size
y_obp_glm <- calcNormFactors(y_obp_glm)
## Begin Computing Differential Expression
y_obp_glm <- estimateDisp(y_obp_glm,glm_design)
y_obp_glm <- estimateGLMCommonDisp(y_obp_glm, glm_design)
y_obp_glm <- estimateGLMTrendedDisp(y_obp_glm, glm_design)
y_obp_glm <- estimateGLMTagwiseDisp(y_obp_glm, glm_design)
## QLM Fit and DE analysis
## Note: For more conservative estimates, use glmQLFTest vs. glmLRT
fit_obp_glm <- glmQLFit(y_obp_glm,glm_design)
## Compute Differential expression based on number of supergene alleles
## DE Analysis for Paper
obp.QLF.BBvBb <- glmQLFTest(fit_obp_glm,coef="GenotypeBb")
obp.QLF.BBvBb_TT <- topTags(obp.QLF.BBvBb,n=Inf)
obp.QLF.BBvBb_TT_sig <- topTags(obp.QLF.BBvBb_TT,p.value = 0.01,n=Inf)$table
obp.QLF.BBvbb <- glmQLFTest(fit_obp_glm,coef="Genotypebb")
obp.QLF.BBvbb_TT <- topTags(obp.QLF.BBvbb,n=Inf)
obp.QLF.BBvbb_TT_sig <- topTags(obp.QLF.BBvbb_TT,p.value = 0.01,n=Inf)$table
## Compute differential expression based on social form
obp.QLF.SF <- glmQLFTest(fit_obp_glm,contrast=makeContrasts(Monogyne - Polygyne, levels=glm_design))
obp.QLF.SF_TT <- topTags(obp.QLF.SF,n=Inf)
obp.QLF.SF_TT_sig <- topTags(obp.QLF.SF_TT,p.value = 0.01,n=Inf)$table
## Reorder the dataframes
obp.qlf.Brain_BB_v_Bb_order <- obp.qlf.Brain_BB_v_Bb.TT$table[ order(row.names(obp.qlf.Brain_BB_v_Bb.TT$table)), ]
obp.qlf.Ovary_BB_v_Bb_order <- obp.qlf.Ovary_BB_v_Bb.TT$table[ order(row.names(obp.qlf.Ovary_BB_v_Bb.TT$table)), ]
obp.qlf.Brain_BB_v_bb_order <- obp.qlf.Brain_BB_v_bb.TT$table[ order(row.names(obp.qlf.Brain_BB_v_bb.TT$table)), ]
obp.qlf.Ovary_BB_v_bb_order <- obp.qlf.Ovary_BB_v_bb.TT$table[ order(row.names(obp.qlf.Ovary_BB_v_bb.TT$table)), ]
obp.qlf.Core_BB_v_Bb_order <- obp.QLF.BBvBb_TT$table[ order(row.names(obp.QLF.BBvBb_TT$table)), ]
obp.qlf.Core_BB_v_bb_order <- obp.QLF.BBvbb_TT$table[ order(row.names(obp.QLF.BBvbb_TT$table)), ]
obp.qlf.SF_order <- obp.QLF.SF_TT$table[ order(row.names(obp.QLF.SF_TT$table)), ]
#### Generate FDR heatmap (Figure S4B)
DEG_heatmap <- data.frame(rep(x = 0, 27))
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Core_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Core_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.SF_order$FDR)
row.names(DEG_heatmap) <- row.names(obp.qlf.Core_BB_v_bb_order)
DEG_heatmap <- DEG_heatmap[,2:8]
colnames(DEG_heatmap) <- c("Brain_BB_v_Bb","Ovary_BB_v_Bb","Brain_BB_v_bb","Ovary_BB_v_bb","Core_BB_v_Bb","Core_BB_v_bb","Core_SocialForm")
# create color palette
col.pal <- brewer.pal(9,"YlOrRd")
hm.parameters <- list(DEG_heatmap,
color = col.pal,
scale = "none",
kmeans_k = NA,
show_rownames = T, show_colnames = T,breaks=c(0,0.001,0.01,0.05,0.1,0.2,0.5,1),display_numbers=TRUE,number_format="%.3f",
main = "Solenopsis invicta OBP DEG Significance",
clustering_method = "complete",
cluster_rows = FALSE, cluster_cols = FALSE,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation")
do.call("pheatmap", hm.parameters)
#### Generate LogFC heatmap (Figure S4A)
DEG_heatmap <- data.frame(rep(x = 0, 27))
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,-1*obp.qlf.Core_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,-1*obp.qlf.Core_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.SF_order$logFC)
row.names(DEG_heatmap) <- row.names(obp.qlf.Core_BB_v_bb_order)
DEG_heatmap <- DEG_heatmap[,2:8]
colnames(DEG_heatmap) <- c("Brain_BB_v_Bb","Ovary_BB_v_Bb","Brain_BB_v_bb","Ovary_BB_v_bb","Core_BB_v_Bb","Core_BB_v_bb","Core_SocialForm")
# create color palette
col.pal <- brewer.pal(9,"RdBu")
hm.parameters <- list(DEG_heatmap,
color = col.pal,
scale = "none",
kmeans_k = NA,
show_rownames = T, show_colnames = T,display_numbers=TRUE,number_format="%.3f",
main = "Solenopsis invicta OBP DEG logFC",
clustering_method = "complete",
cluster_rows = FALSE, cluster_cols = FALSE,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation")
do.call("pheatmap", hm.parameters)
## Here we perform some additional analyses to address reviewer comments
## Pairwise Bb vs. bb comparisons
## make new volcano plots
make_volcano(pw.qlf.Ovary_Bb_v_bb,"Ovary SB/Sb vs. Sb/Sb")
## [1] 0
## [1] 0
## [1] 0
## [1] 2
make_volcano(pw.qlf.Brain_Bb_v_bb,"Brain SB/Sb vs. Sb/Sb")
## [1] 0
## [1] 1
## [1] 0
## [1] 8
## Warning: Removed 1 rows containing missing values (geom_point).
# tissue-specific UpSet plots
Brain_Bb_bb <- temp1[temp1$Row.names %in% row.names(pw.qlf.Brain_Bb_v_bb.TT.sig),]
Brain_Bb_bb
## Row.names chr start end LOC_ID
## 1756 gene11577 lg16 17403499 17404920 LOC105202812
## 2000 gene11797 lg16 10122809 10131987 LOC105203092
## 5281 gene1475 <NA> NA NA LOC105206526
## 6583 gene177 lg16 10894386 10899201 LOC105207492
## 7431 gene2532 NW_011795606.1 12450 15077 LOC105192930
## 7638 gene2719 lg16 12149625 12172468 LOC105193135
## 13926 gene8379 lg16 9144321 9146201 LOC105199321
## 13943 gene8394 lg16 9317199 9319117 LOC105199336
## 13954 gene8403 lg16 9399971 9406551 LOC105199343
## Description
## 1756 pre-mRNA-splicing factor Slu7-like
## 2000 uncharacterized LOC105203092
## 5281 dopamine receptor 1
## 6583 uncharacterized LOC105207492, transcript variant X2
## 7431 <NA>
## 7638 uncharacterized LOC105193135, transcript variant X1
## 13926 copper homeostasis protein cutC homolog
## 13943 polyadenylate-binding protein-interacting protein 2, transcript variant X1
## 13954 uncharacterized LOC105199343, transcript variant X2
## Brain_Bb_logFC Brain_Bb_logCPM Brain_Bb_FDR Brain_bb_logFC
## 1756 1.1619521 5.2088087 2.658828e-01 7.079971
## 2000 0.7540459 3.8182881 1.879356e-01 2.187071
## 5281 0.3992948 6.0856020 8.166661e-01 2.567231
## 6583 1.1214292 6.6827817 3.003548e-01 5.145919
## 7431 0.1540111 0.2181494 9.936302e-01 5.850681
## 7638 -1.4498957 5.6683670 2.938347e-09 -2.427515
## 13926 1.0939644 3.1372041 6.277966e-01 5.224093
## 13943 1.3048902 4.4368701 9.350473e-03 3.141716
## 13954 0.6340571 5.8291791 9.367098e-02 2.141782
## Brain_bb_logCPM Brain_bb_FDR Ovary_Bb_logFC Ovary_Bb_logCPM Ovary_Bb_FDR
## 1756 5.2088087 1.321374e-14 0.8236580 5.2088087 0.8128688802
## 2000 3.8182881 1.263121e-08 0.5140757 3.8182881 0.7934655800
## 5281 6.0856020 6.245568e-09 0.1622900 6.0856020 1.0000000000
## 6583 6.6827817 1.782271e-11 1.0639291 6.6827817 0.4907562039
## 7431 0.2181494 9.578921e-04 0.6680116 0.2181494 1.0000000000
## 7638 5.6683670 1.820967e-17 -0.9305337 5.6683670 0.0003210017
## 13926 3.1372041 1.314212e-07 0.9249539 3.1372041 0.9934274078
## 13943 4.4368701 2.558356e-11 0.5737414 4.4368701 0.8049172978
## 13954 5.8291791 2.090814e-12 0.3874658 5.8291791 0.7635615783
## Ovary_bb_logFC Ovary_bb_logCPM Ovary_bb_FDR Multifactorial_Bb_logFC
## 1756 3.6876028 5.2088087 7.648926e-08 -1.1524809
## 2000 1.2514333 3.8182881 1.513121e-03 -0.6278227
## 5281 1.3519623 6.0856020 4.270179e-03 -0.5431052
## 6583 2.0739424 6.6827817 1.354704e-03 -0.9569147
## 7431 1.5059072 0.2181494 4.742832e-01 -0.5057420
## 7638 -1.4064056 5.6683670 1.349339e-08 1.3821503
## 13926 3.6134826 3.1372041 8.623034e-05 -1.1489277
## 13943 1.7367393 4.4368701 6.828259e-05 -1.1488844
## 13954 0.7263317 5.8291791 1.436868e-02 -0.5882147
## Multifactorial_Bb_logCPM Multifactorial_Bb_FDR Multifactorial_bb_logFC
## 1756 5.2088078 2.350646e-01 -7.070458
## 2000 3.8182934 3.434857e-01 -2.060850
## 5281 6.0856036 6.506031e-01 -2.711042
## 6583 6.6827812 4.304093e-01 -4.981407
## 7431 0.2182094 9.554528e-01 -6.202680
## 7638 5.6683667 6.315581e-10 2.359770
## 13926 3.1372027 5.677740e-01 -5.279122
## 13943 4.4368712 2.041881e-02 -2.985714
## 13954 5.8291794 1.057792e-01 -2.095939
## Multifactorial_bb_logCPM Multifactorial_bb_FDR ASE_logFC ASE_logCPM
## 1756 5.2088078 4.574738e-15 7.5278988 10.579215
## 2000 3.8182934 1.866400e-08 1.1521056 10.994312
## 5281 6.0856036 5.897525e-10 -0.5645447 9.513952
## 6583 6.6827812 1.538864e-11 NA NA
## 7431 0.2182094 3.582084e-04 NA NA
## 7638 5.6683667 2.238215e-19 -0.2689870 13.754954
## 13926 3.1372027 4.445164e-08 NA NA
## 13943 4.4368712 4.375044e-11 1.8420198 10.398021
## 13954 5.8291794 5.773635e-13 1.8153050 10.936319
## ASE_FDR
## 1756 2.196734e-07
## 2000 7.095488e-03
## 5281 5.854976e-01
## 6583 NA
## 7431 NA
## 7638 3.842147e-01
## 13926 NA
## 13943 4.505039e-04
## 13954 3.733954e-06
Brain_all_sg_comps <- list(Brain_BB_Bb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
Brain_BB_bb=row.names(pw.qlf.Brain_BB_v_bb.TT.sig),
Brain_Bb_bb=row.names(pw.qlf.Brain_Bb_v_bb.TT.sig))
upset(fromList(Brain_all_sg_comps),nsets=3,order.by = "freq",text.scale = 1.4)
Ovary_Bb_bb <- temp1[temp1$Row.names %in% row.names(pw.qlf.Ovary_Bb_v_bb.TT.sig),]
Ovary_Bb_bb
## Row.names chr start end LOC_ID
## 1756 gene11577 lg16 17403499 17404920 LOC105202812
## 1763 gene11583 lg16 17477820 17479836 LOC105202816
## Description Brain_Bb_logFC Brain_Bb_logCPM
## 1756 pre-mRNA-splicing factor Slu7-like 1.161952 5.208809
## 1763 <NA> 0.000000 6.608818
## Brain_Bb_FDR Brain_bb_logFC Brain_bb_logCPM Brain_bb_FDR Ovary_Bb_logFC
## 1756 0.2658828 7.079971 5.208809 1.321374e-14 0.823658
## 1763 1.0000000 -1.033023 6.608818 7.615750e-01 1.784655
## Ovary_Bb_logCPM Ovary_Bb_FDR Ovary_bb_logFC Ovary_bb_logCPM Ovary_bb_FDR
## 1756 5.208809 0.8128689 3.687603 5.208809 7.648926e-08
## 1763 6.608818 0.1662591 9.235454 6.608818 9.057535e-11
## Multifactorial_Bb_logFC Multifactorial_Bb_logCPM Multifactorial_Bb_FDR
## 1756 -1.15248093 5.208808 0.2350646
## 1763 -0.04708812 6.608817 1.0000000
## Multifactorial_bb_logFC Multifactorial_bb_logCPM Multifactorial_bb_FDR
## 1756 -7.070458 5.208808 4.574738e-15
## 1763 0.985932 6.608817 6.989363e-01
## ASE_logFC ASE_logCPM ASE_FDR
## 1756 7.527899 10.57921 2.196734e-07
## 1763 NA NA NA
Ovary_all_sg_comps <- list(Ovary_BB_Bb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
Ovary_BB_bb=row.names(pw.qlf.Ovary_BB_v_bb.TT.sig),
Ovary_Bb_bb=row.names(pw.qlf.Ovary_Bb_v_bb.TT.sig))
upset(fromList(Ovary_all_sg_comps),nsets=3,order.by = "freq",text.scale = 1.4)
## GLM Bb vs. bb comparisons
con.Bb_bb <- makeContrasts(GenotypeBb - Genotypebb, levels=glm_design)
glm.QLF.Bb_bb <- glmQLFTest(fit_glm,contrast=con.Bb_bb)
glm.QLF.Bb_bb_TT <- topTags(glm.QLF.Bb_bb,n=Inf)
glm.QLF.Bb_bb_TT_sig <- topTags(glm.QLF.Bb_bb_TT,p.value = 0.01,n=Inf)$table
make_volcano(glm.QLF.Bb_bb,"GLM SB/Sb vs. Sb/Sb")
## [1] 0
## [1] 1
## [1] 0
## [1] 8
## Warning: Removed 1 rows containing missing values (geom_point).
save.image(file = "Molecular_Ecology_Markdown_v2.RData")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] dplyr_1.0.0 rentrez_1.2.2 rtracklayer_1.42.2
## [4] readxl_1.3.1 randtests_1.0 RColorBrewer_1.1-2
## [7] CircStats_0.2-6 boot_1.3-25 MASS_7.3-51.6
## [10] pheatmap_1.0.12 eulerr_6.1.0 data.table_1.12.8
## [13] topGO_2.34.0 SparseM_1.78 GO.db_3.7.0
## [16] AnnotationDbi_1.44.0 Biobase_2.42.0 graph_1.60.0
## [19] ggplot2_3.3.1 cowplot_1.0.0 karyoploteR_1.8.8
## [22] regioneR_1.14.0 GenomicRanges_1.34.0 GenomeInfoDb_1.18.2
## [25] IRanges_2.16.0 S4Vectors_0.20.1 BiocGenerics_0.28.0
## [28] gridExtra_2.3 magick_2.3 readr_1.3.1
## [31] UpSetR_1.4.0 edgeR_3.24.3 limma_3.38.3
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 ellipsis_0.3.1
## [3] biovizBase_1.30.1 htmlTable_1.13.3
## [5] XVector_0.22.0 base64enc_0.1-3
## [7] dichromat_2.0-0 rstudioapi_0.11
## [9] farver_2.0.3 bit64_0.9-7
## [11] fansi_0.4.1 splines_3.5.1
## [13] knitr_1.28 polyclip_1.10-0
## [15] jsonlite_1.6.1 Formula_1.2-3
## [17] Rsamtools_1.34.1 cluster_2.1.0
## [19] compiler_3.5.1 httr_1.4.1
## [21] backports_1.1.8 assertthat_0.2.1
## [23] Matrix_1.2-18 lazyeval_0.2.2
## [25] cli_2.0.2 formatR_1.7
## [27] acepack_1.4.1 htmltools_0.5.0
## [29] prettyunits_1.1.1 tools_3.5.1
## [31] gtable_0.3.0 glue_1.4.1
## [33] GenomeInfoDbData_1.2.0 Rcpp_1.0.4.6
## [35] cellranger_1.1.0 vctrs_0.3.1
## [37] Biostrings_2.50.2 polylabelr_0.2.0
## [39] xfun_0.14 stringr_1.4.0
## [41] lifecycle_0.2.0 ensembldb_2.6.8
## [43] XML_3.99-0.3 zlibbioc_1.28.0
## [45] scales_1.1.1 BSgenome_1.50.0
## [47] VariantAnnotation_1.28.13 hms_0.5.3
## [49] ProtGenerics_1.14.0 SummarizedExperiment_1.12.0
## [51] AnnotationFilter_1.6.0 yaml_2.2.1
## [53] curl_4.3 memoise_1.1.0
## [55] biomaRt_2.38.0 rpart_4.1-15
## [57] latticeExtra_0.6-28 stringi_1.4.6
## [59] RSQLite_2.2.0 checkmate_2.0.0
## [61] GenomicFeatures_1.34.8 BiocParallel_1.16.6
## [63] rlang_0.4.6 pkgconfig_2.0.3
## [65] matrixStats_0.56.0 bitops_1.0-6
## [67] evaluate_0.14 lattice_0.20-41
## [69] purrr_0.3.4 labeling_0.3
## [71] GenomicAlignments_1.18.1 htmlwidgets_1.5.1
## [73] bit_1.1-15.2 tidyselect_1.1.0
## [75] plyr_1.8.6 magrittr_1.5
## [77] R6_2.4.1 generics_0.0.2
## [79] Hmisc_4.4-0 DelayedArray_0.8.0
## [81] DBI_1.1.0 pillar_1.4.4
## [83] foreign_0.8-72 withr_2.2.0
## [85] survival_3.2-3 RCurl_1.98-1.2
## [87] nnet_7.3-14 tibble_3.0.1
## [89] crayon_1.3.4 utf8_1.1.4
## [91] rmarkdown_2.3 bamsignals_1.14.0
## [93] progress_1.2.2 locfit_1.5-9.4
## [95] grid_3.5.1 blob_1.2.1
## [97] digest_0.6.25 bezier_1.1.2
## [99] munsell_0.5.0